0. Requirements

Note: Please load the workspace in the directory implementation/R/workspace/preprocessing.RData to run the following code and re-use the previously created variables. Furthermore, the following libraries must be installed and loaded:

# intall necessary packages
#install.packages("quanteda")
#install.packages("readtext")
#install.packages("tidyverse")
#install.packages("quanteda.textstats")
#install.packages("quanteda.textplots")
#install.packages("data.table")
#install.packages("stringr")
#install.packages("spacyr")
#install.packages("textcat")
#install.packages("plyr")

# load libraries
library(quanteda)
library(readtext)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ───────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.5     ✓ purrr   0.3.4
✓ tibble  3.1.6     ✓ dplyr   1.0.8
✓ tidyr   1.2.0     ✓ stringr 1.4.0
✓ readr   2.1.2     ✓ forcats 0.5.1
── Conflicts ──────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(quanteda.textplots)
library(quanteda.textstats)
library(plyr)
--------------------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
--------------------------------------------------------------------------------------------

Attache Paket: ‘plyr’

Die folgenden Objekte sind maskiert von ‘package:dplyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise, summarize

Das folgende Objekt ist maskiert ‘package:purrr’:

    compact
library(dplyr)
#library(stringr)
#library(data.table)
#library(textcat)

1. Collocations

To retrieve the direct collocations, i.e. one token to the left or right of the compound word, we use the kwic function offered by quanteda. Here we can choose a window of 1 to make sure we obtain the correct number of collocations.

1.1 First Look

Let’s have a first look at the collocations for the example “Klimaleugner” (en: “climate denier”). We are going to retrieve the collocations to the left (pre) and to the right (post) of the key word and count their occurrences. Then, we will output the Top-5 collocations for each category, i.e. pre and post.

# apply keyword-in-context function for given word
word = "klimaleugner"

# to C2022
kwic_con <- kwic(sp_c2022_tokens, pattern=word, window=1, valuetype="fixed") %>%
  as_tibble()

# to P2022
kwic_pro <- kwic(sp_p2022_tokens, pattern=word, window=1, valuetype="fixed") %>%
  as_tibble()

Let’s show the Top-5 for the C2022 corpus:

kwic_con %>%
  dplyr::count(pre) %>%
  arrange(desc(n)) %>%
  head(n=5)

kwic_con %>%
  dplyr:: count(post) %>%
  arrange(desc(n)) %>%
  head(n=5) 

And the Top-5 for the P2022 corpus:

kwic_pro %>%
 dplyr::count(pre) %>%
  arrange(desc(n)) %>%
  head(n=5)

kwic_pro %>%
 dplyr::count(post) %>%
  arrange(desc(n)) %>%
  head(n=5)

1.2 Apply to all Glossary Terms

Now we seek to create tables that contain the top 5 pre and post collocations for each of our compound words. Firstly, we create a table for the collocations we can obtain from P2022

# for each compound, get list of top 5 collocations
# initiate empty data frame 
pro_colls10 = data.frame()

# for each compound
for (word in compounds){

  # get collocations
  kwic_pro <- kwic(sp_p2022_tokens, pattern=word, window=1, valuetype="fixed") %>%
    as_tibble()
  keyword <- word 
  
  # retrieve top5 preceding collocations
  pro_pre <- kwic_pro %>%
    dplyr::count(pre) %>%
    arrange(desc(n)) %>%
    head(n=5)

  # retrieve top5 following collocations
  pro_post <- kwic_pro %>%
    dplyr::count(post) %>%
    arrange(desc(n)) %>%
    head(n=5)

  # normalize data frames with top5 collocations 
  pro_pre$keyword <- keyword
  pro_pre$tag <- "pre"
  names(pro_pre)[names(pro_pre) == 'pre'] <- "word"

  pro_post$keyword <- keyword
  pro_post$tag <- "post"
  names(pro_post)[names(pro_post) == 'post'] <- "word"
  
  pro_colls10 <- rbind(pro_colls10, pro_pre)
  pro_colls10 <- rbind(pro_colls10, pro_post)}

Most of the collocations only occur exactly once. Since this is not very informative for us, we remove all the collocations with a count of exactly 1. Also, we want to remove noise, i.e. empty strings from the collocations.

# only keep collocations that appear more than once 
top_colls_pro<-pro_colls10[(pro_colls10$n > 1),]

# remove empty strings 
top_colls_pro<-top_colls_pro[(top_colls_pro$word > " "),]

And save the table to a csv file.

#write.csv(top_colls_pro, "../output/top_collocations_pro.csv")

Then, we create the same table of the top 5 pre and post collocations for the C2022.

# for each compound, get list of top 5 collocations
# initiate empty data frame 
con_colls10 = data.frame()

# for each compound
for (word in compounds){

  # get collocations
  kwic_con <- kwic(sp_c2022_tokens, pattern=word, window=1, valuetype="fixed") %>%
    as_tibble()
  #keyword <- kwic_pro$keyword[[1]]
  keyword <- word 
  
  # retrieve top5 preceding collocations
  con_pre <- kwic_con %>%
    dplyr::count(pre) %>%
    arrange(desc(n)) %>%
    head(n=5)

  # retrieve top5 following collocations
  con_post <- kwic_con %>%
    dplyr::count(post) %>%
    arrange(desc(n)) %>%
    head(n=5)

  # normalize data frames with top5 collocations 
  con_pre$keyword <- keyword
  con_pre$tag <- "pre"
  names(con_pre)[names(con_pre) == 'pre'] <- "word"

  con_post$keyword <- keyword
  con_post$tag <- "post"
  names(con_post)[names(con_post) == 'post'] <- "word"
  
  con_colls10 <- rbind(con_colls10, con_pre)
  con_colls10 <- rbind(con_colls10, con_post)}

And, just like before, we remove the collocations that appeared only once in the corpus (and remove noise, i.e. empty strings from the collocations).

# only keep collocations that appear more than once 
top_colls_con<-con_colls10[(con_colls10$n > 1),]
# remove empty strings 
top_colls_con<-top_colls_con[(top_colls_con$word > " "),]

And save the final table to a csv file.

write.csv(top_colls_con, "../output/top_collocations_con1.csv")

2. Concordances (KWIC)

To retrieve the context of each compound word, we extract the concordances on a sentence level. That means, we extract a window of 5 sentences to the left and to the right of the keyword sentence. To do this, we must tokenize our data by sentences, instead of words.

2.1 Preprocessing

Since we cannot normalize the data the same way when we tokenize it on sentence-level, we firstly create word tokens from the corpora.

# create word tokens for P2022 and C2022
p2022_tokens <- tokens(pro2022, remove_punct = FALSE, remove_symbols = TRUE,
                  remove_numbers = TRUE, remove_url = TRUE, remove_separators = TRUE)

c2022_tokens <- tokens(contra2022, remove_punct = FALSE, remove_symbols = TRUE,
                  remove_numbers = TRUE, remove_url = TRUE, remove_separators = TRUE)

To these tokens, we apply a normalization step where we remove hyphens within words, such as “Klima-Skeptiker” to convert it to “Klimaskeptiker”.

# remove hyphens from tokens

# convert to tokens
#p2022_toks_cleaned <- as.tokens(p2022_tokens)

# replace multi-token sequences with a "compound" token 
#toks_comp <- tokens_compound(p2022_toks_cleaned, phrase("*-*"), concatenator ="")
toks_comp_p <- tokens_compound(p2022_tokens, phrase("*-*"), concatenator ="")


# get tokens containing the hyphen
toks_hyphenated_p <- grep("\\w+-\\w+", types(toks_comp_p), value = TRUE)

# replace the hyphenated tokens by versions without hyphen
p2022_toks_cleaned <- tokens_replace(toks_comp_p, toks_hyphenated_p, gsub("-", "", toks_hyphenated_p))

# convert to tokens
#c2022_toks_cleaned <- as.tokens(c2022_tokens)

#toks_comp <- tokens_compound(c2022_toks_cleaned, phrase("*-*"), concatenator ="")
toks_comp_c <- tokens_compound(c2022_tokens, phrase("*-*"), concatenator ="")

# get tokens containing the hyphen
toks_hyphenated_c <- grep("\\w+-\\w+", types(toks_comp_c), value = TRUE)

# replace the hyphenated tokens by versions without hyphen
c2022_toks_cleaned <- tokens_replace(toks_comp_c, toks_hyphenated_c, gsub("-", "", toks_hyphenated_c))

# merge tokens back into corpus object 
p2022_merged_toks <- corpus(sapply(p2022_toks_cleaned, paste, collapse = " "))
c2022_merged_toks <- corpus(sapply(c2022_toks_cleaned, paste, collapse = " "))

Now we can create sentence tokens for both corpora.

# create "sentence" tokens for P2022 and C2022 corpus
p2022_sentences <- tokens(p2022_merged_toks, remove_punct = FALSE, remove_symbols = TRUE,
                  remove_numbers = TRUE, remove_url = TRUE, remove_separators = TRUE, 
                  what = "sentence")

c2022_sentences <- tokens(c2022_merged_toks, remove_punct = FALSE, remove_symbols = TRUE,
                  remove_numbers = TRUE, remove_url = TRUE, remove_separators = TRUE, 
                  what = "sentence")

2.2 Key Word In Context Retrieval

# create a data frame from tokens containing 5 sentences before and after the keyword 

### DO FOR p2022 ####
kwic_pro_sent.df <- data.frame(matrix(ncol = 7, nrow = 0))
kwiclist_sent_pro <- list()

# for each compound word
for (word in compounds)
{
  # retrieve sentences before/after keyword 
  context_pro_sent <- kwic(p2022_sentences, word, valuetype="regex", window=5)
  kwiclist_sent_pro[[word]] <- context_pro_sent # save to list 
}

kwic_pro_sent.df = do.call(rbind, kwiclist_sent_pro) # save to final data frame 

### DO FOR c2022 ###
kwic_con_sent.df <- data.frame(matrix(ncol = 7, nrow = 0)) 
kwiclist_sent_con <- list()

# for each compound word
for (word in compounds)
{
  # retrieve sentences before/after keyword 
  context_con_sent <- kwic(c2022_sentences, word, valuetype="regex", window=5) 
  kwiclist_sent_con[[word]] <- context_con_sent # save to list
}

kwic_con_sent.df = do.call(rbind, kwiclist_sent_con) # save to final data frame 

kwic_pro_sent.df
Keyword-in-context with 803 matches.
kwic_con_sent.df
Keyword-in-context with 1,946 matches.
[ reached max_nrow ... 946 more matches ]

2.2 Export Concordances

# save to csv file 
write.csv(kwic_pro_sent.df,"../output/pro_context_new.csv", row.names = FALSE)
write.csv(kwic_con_sent.df,"../output/con_context_new.csv", row.names = FALSE)

3. Term Frequencies

Additionally, we compute the term frequencies of each compound word and the according TF-IDF score, since both corpora have a different size and we want to explore the relevance of each term.

Create a function to normalize the TF-IDF scores

# min/max normalization from -1 to 1, relative to data frame results
normalize <- function(x, na.rm = TRUE){
  return((x - min(x)) / (max(x)-min(x)))}
p2022_cleaned <- corpus(sapply(sp_p2022_tokens, paste, collapse = " "))
c2022_cleaned <- corpus(sapply(sp_c2022_tokens, paste, collapse = " "))

p2022_cleaned$group <- "activists"
c2022_cleaned$group <- "skeptics"

complete = p2022_cleaned+c2022_cleaned
# create dfm with frequencies per group
dfm_complete_freq <- dfm(complete) %>% 
                       dfm_keep(pattern = compounds) %>% # only keep compound words
                       dfm_group(groups = group) # keep groups "activists" and "skeptics"
Warnung: 'dfm.corpus()' is deprecated. Use 'tokens()' first.
# convert dfm to data frame 
dfm_complete_df <- dfm_complete_freq %>% 
                  convert(to = "data.frame") %>%
                  t() # transpose data frame



set.seed(132) # set seed for reproducibility
textplot_wordcloud(dfm_complete_freq, comparison = TRUE, max_words = 250) # plot wordcloud


write.csv(dfm_complete_df,"/Users/anna/Documents/uni/thesis/implementation/R/output/tf_complete1.csv", row.names = TRUE)

And compute TF-IDF of the DFMs

### FOR C2022
# create dfm of lemmatized tokens, only keep compound words
#dfm_c2022 <- dfm(sp_c2022_tokens) %>% dfm_keep(pattern = compounds)
#dfm_c2022_tfidf <- dfm_tfidf(dfm_c2022) # compute tfidf scores
#top_c2022_norm <- normalize(topfeatures(dfm_c2022, n=300)) # normalize scores

dfm_c2022 <- dfm(sp_c2022_tokens) %>% 
            dfm_tfidf() %>%
            dfm_keep(pattern = compounds) 

c2022_tfidf <- normalize(topfeatures(dfm_c2022, n=300))

  
 # dfm_keep(pattern = compounds)
#dfm_c2022_tfidf <- dfm_tfidf(dfm_c2022) # compute tfidf scores
#top_c2022_norm <- normalize(topfeatures(dfm_c2022, n=300))


# convert dfm into data frame
top_c2022_norm <- data.frame(Term = names(c2022_tfidf), Freq = c2022_tfidf, row.names = NULL) %>%
  dplyr::arrange(desc(Freq))

### FOR P2022
# create dfm of lemmatized tokens, only keep compound words
#dfm_p2022 <- dfm(sp_p2022_tokens) %>% dfm_keep(pattern = compounds) 
#dfm_p2022_tfidf <- dfm_tfidf(dfm_p2022) # compute tfidf scores
#top_p2022_norm <- normalize(topfeatures(dfm_p2022, n=300)) # normalize scores


dfm_p2022 <- dfm(sp_p2022_tokens) %>% 
            dfm_tfidf() %>%
            dfm_keep(pattern = compounds) 

p2022_tfidf <- normalize(topfeatures(dfm_p2022, n=300))

# convert dfm into data frame
top_p2022_norm <- data.frame(Term = names(p2022_tfidf), Freq = p2022_tfidf, row.names = NULL) %>%
  dplyr::arrange(desc(Freq))

# change column names to be able to merge both data frames
colnames(top_p2022_norm)[2] <- "Freq_P2022"
colnames(top_c2022_norm)[2] <- "Freq_C2022"

# merge data frames 
df_merge <- merge(top_c2022_norm,top_p2022_norm,by="Term", all.x = TRUE, all.y = TRUE)

# write to csv file
write.csv(df_merge,"/Users/anna/Documents/uni/thesis/implementation/R/output/tfidf_complete.csv", row.names = TRUE)

Plot TF-IDF Scores

# retrieve frequencies for "klima" words
freqs_pro <- textstat_frequency(dfm_p2022, force=TRUE)
freqs_con <- textstat_frequency(dfm_c2022, force=TRUE)

# apply normalization
#freqs_pro$normalize = round(normalize(freqs_pro$frequency), 3)
#freqs_con$normalize = round(normalize(freqs_con$frequency), 3)

# retrieve only words that are contained in our final compound list 
#freqs_pro_subset <- freqs_pro[freqs_pro$feature %in% compounds, ]
#freqs_pro_subset$feature <- str_to_title(freqs_pro_subset$feature)

#freqs_con_subset <- freqs_con[freqs_con$feature %in% compounds, ]
#freqs_con_subset$feature <- str_to_title(freqs_con_subset$feature)

freqs_pro$feature <- str_to_title(freqs_pro$feature)
freqs_con$feature <- str_to_title(freqs_con$feature)

freqs_pro$normalize = round(normalize(freqs_pro$frequency),3)
freqs_con$normalize = round(normalize(freqs_con$frequency),3)


freqs.act <- filter(freqs_pro) %>% as.data.frame() %>% select(feature, normalize)
freqs.scept <- filter(freqs_con) %>% as.data.frame() %>% select(feature, normalize)
freqs <- left_join(freqs.act, freqs.scept, by = "feature") %>% head(50) %>% arrange(normalize.x) %>% mutate(feature = factor(feature, feature))

#freqs_pro_subset$normalize = round(normalize(freqs_pro_subset$frequency), 3)
#freqs_con_subset$normalize = round(normalize(freqs_con_subset$frequency), 3)

# plot comparison of both groups -> only words from compound list 
#freqs.act <- filter(freqs_pro_subset) %>% as.data.frame() %>% select(feature, normalize)
#freqs.scept <- filter(freqs_con_subset) %>% as.data.frame() %>% select(feature, normalize)
#freqs <- left_join(freqs.act, freqs.scept, by = "feature") %>% head(30) %>% arrange(normalize.x) %>% #mutate(feature = factor(feature, feature))

# create plot
plot8 <- ggplot(freqs) +
    geom_segment(aes(x=feature, xend=feature, y=normalize.x, yend=normalize.y), color="grey") +
    geom_point(aes(x=feature, y=normalize.x, colour="Activists"), size = 3) +
    geom_point(aes(x=feature, y=normalize.y, colour="Sceptics"), size = 3) +
    ggtitle("Comparison 'Klima' TF-IDF Scores per Group") + 
    xlab("") + ylab("TF-IDF") +
    coord_flip()

plot8+labs(colour="Group")
Warnung: Removed 20 rows containing missing values (geom_segment).
Warnung: Removed 20 rows containing missing values (geom_point).
#ggsave("/Users/anna/Documents/uni/thesis/plots/comparison_klima_freqs_lemma.png", dpi=300, dev='png', height=6, width=12, units="in")
ggsave("/Users/anna/Documents/uni/thesis/plots/comparison_tfidf.png", dpi=300, dev='png', height=10, width=15, units="in")
Warnung: Removed 20 rows containing missing values (geom_segment).
Warnung: Removed 20 rows containing missing values (geom_point).

TO DELETE!!!!

Plot TF-IDF Scores

# create a sample of the dfm with all words starting with "klima..." 
#klima_p2000 <- dfm_select(dfm_p2000_lemma, pattern="klima*")
#klima_c2000 <- dfm_select(dfm_c2000_lemma, pattern="klima*")

c2022_dfm <- dfm(sp_c2022_tokens) 
c2022_tfidf <- dfm_tfidf(c2022_dfm)

p2022_dfm <- dfm(sp_p2022_tokens) 
p2022_tfidf <- dfm_tfidf(p2022_dfm)
#dfm_c2022 <- dfm(sp_c2022_tokens) %>% dfm_keep(pattern = compounds)
#dfm_c2022_tfidf <- dfm_tfidf(dfm_c2022) # compute tfidf scores
#top_c2022_norm <- normalize(topfeatures(dfm_c2022, n=300))

# calculate tfidf for "klima" words
#p2000_tfidf <- dfm_tfidf(klima_p2000, scheme_tf = "prop", scheme_df = "inverse")
#c2000_tfidf <- dfm_tfidf(klima_c2000, scheme_tf = "prop", scheme_df = "inverse")

# retrieve frequencies for "klima" words
freqs_pro <- textstat_frequency(p2022_tfidf, force=TRUE)
freqs_con <- textstat_frequency(c2022_tfidf, force=TRUE)

# apply normalization
#freqs_pro$normalize = round(normalize(freqs_pro$frequency), 3)
#freqs_con$normalize = round(normalize(freqs_con$frequency), 3)

# retrieve only words that are contained in our final compound list 
freqs_pro_subset <- freqs_pro[freqs_pro$feature %in% compounds, ]
freqs_pro_subset$feature <- str_to_title(freqs_pro_subset$feature)

freqs_con_subset <- freqs_con[freqs_con$feature %in% compounds, ]
freqs_con_subset$feature <- str_to_title(freqs_con_subset$feature)


freqs_pro_subset$normalize = round(normalize(freqs_pro_subset$frequency), 3)
freqs_con_subset$normalize = round(normalize(freqs_con_subset$frequency), 3)

# plot comparison of both groups -> only words from compound list 
freqs.act <- filter(freqs_pro_subset) %>% as.data.frame() %>% select(feature, normalize)
freqs.scept <- filter(freqs_con_subset) %>% as.data.frame() %>% select(feature, normalize)
freqs <- left_join(freqs.act, freqs.scept, by = "feature") %>% head(30) %>% arrange(normalize.x) %>% mutate(feature = factor(feature, feature))

# create plot
plot8 <- ggplot(freqs) +
    geom_segment(aes(x=feature, xend=feature, y=normalize.x, yend=normalize.y), color="grey") +
    geom_point(aes(x=feature, y=normalize.x, colour="Activists"), size = 3) +
    geom_point(aes(x=feature, y=normalize.y, colour="Sceptics"), size = 3) +
    ggtitle("Comparison 'Klima' TF-IDF Scores per Group") + 
    xlab("") + ylab("TF-IDF") +
    coord_flip()

plot8+labs(colour="Group")
Warnung: Removed 8 rows containing missing values (geom_segment).
Warnung: Removed 8 rows containing missing values (geom_point).
ggsave("/Users/anna/Documents/uni/thesis/plots/comparison_klima_freqs_lemma.png", dpi=300, dev='png', height=6, width=12, units="in")
Warnung: Removed 8 rows containing missing values (geom_segment).
Warnung: Removed 8 rows containing missing values (geom_point).

TO DELETE

kwic(c2022_merged_toks, pattern="klimaanbeter", window=1, valuetype="regex")
kwic(c2022_sentences, pattern="klimabank", window=1, valuetype="regex")

1.2 Apply to all Glossary Terms

Now, we will retrieve the collocations for each compound word. The collocations are then saved to a data frame, one for each corpus, and exported to a csv file, such that we can also use the data in Python

### C2022
# initiate empty data frame for C2022
collocations_con = data.frame(docname=character(),
                 from=integer(),
                 to=integer(),
                 pre=logical(),
                 keyword=character(),
                 post=character(),
                 pattern=factor())

# for each compound word 
for (word in compounds){
  # initiate empty data frame
  colls = data.frame()
  # look up collocations
  colls <- kwic(sp_c2022_tokens, pattern=word, window=1, valuetype="fixed") %>%
  as_tibble()
  # save to data frame 
  collocations_con <- rbind(collocations_con, colls)}

### P2022
# initiate empty data frame for P2022
collocations_pro = data.frame(docname=character(),
                 from=integer(),
                 to=integer(),
                 pre=logical(),
                 keyword=character(),
                 post=character(),
                 pattern=factor())

# for each compound
for (word in compounds){
  # initiate empty data frame
  colls = data.frame()
  # look up collocations
  colls <- kwic(sp_p2022_tokens, pattern=word, window=1, valuetype="fixed") %>%
  as_tibble()
  # save to data frame 
  collocations_pro <- rbind(collocations_pro, colls)}

Please run the following lines to save the output to a csv file.

#write.csv(collocations_con, "../output/collocations_con.csv")
#write.csv(collocations_pro, "../output/collocations_pro.csv")

TO REPLACE COMPOUND FORMS BY THEIR LEMMA

# for each compound

sp_c2022_tokens <- tokens_replace(sp_c2022_tokens, pattern="klimaglaubenslehr", replacement="klimaglaubenslehre", valuetype = "fixed")
sp_c2022_tokens <- tokens_replace(sp_c2022_tokens, pattern="klimakarawan", replacement="klimakarawane", valuetype = "fixed")
sp_c2022_tokens <- tokens_replace(sp_c2022_tokens, pattern="klimazeug", replacement="klimazeugs", valuetype = "fixed")
sp_c2022_tokens <- tokens_replace(sp_c2022_tokens, pattern="klimawendehal", replacement="klimawendehals", valuetype = "fixed")


sp_p2022_tokens <- tokens_replace(sp_p2022_tokens, pattern="klimaglaubenslehr", replacement="klimaglaubenslehre", valuetype = "fixed")
sp_p2022_tokens <- tokens_replace(sp_p2022_tokens, pattern="klimakarawan", replacement="klimakarawane", valuetype = "fixed")
sp_p2022_tokens <- tokens_replace(sp_p2022_tokens, pattern="klimazeug", replacement="klimazeugs", valuetype = "fixed")
sp_p2022_tokens <- tokens_replace(sp_p2022_tokens, pattern="klimawendehal", replacement="klimawendehals", valuetype = "fixed")




for (word_form in ["glaubenslehre"]){
  word = c(unlist_forms(word_form)) # turn into correct format
  original <- compound_df[compound_df$compound_forms %like% word[[1]], ]$original[[1]]
  lemma <- rep(original, length(word))

  # replace string in tokens with lemma form (for pro2000 and contra2000)
  pro2000_tokens <- tokens_replace(pro2000_tokens, word, lemma, valuetype = "fixed")
  contra2000_tokens <- tokens_replace(contra2000_tokens, word, lemma, valuetype = "fixed")}
# function to preprocess compounds data frame 
# this function unnests the list of word forms for each compound and creates a list containing all potential word forms 
unlist_forms = function(word){
  x <- unlist(strsplit(word, ","))
  return(gsub(" ","",x))}

# apply the function to our compounds data frame
compound_forms <- unlist_forms(compound_df$compound_forms)



#for (word in compound_forms){
 # print(word)
#}

compound_forms
LS0tCnRpdGxlOiAiVGhlc2lzOiBDb3JwdXMtQmFzZWQgTWV0aG9kcyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyAwLiBSZXF1aXJlbWVudHMKTm90ZTogUGxlYXNlIGxvYWQgdGhlIHdvcmtzcGFjZSBpbiB0aGUgZGlyZWN0b3J5IGBpbXBsZW1lbnRhdGlvbi9SL3dvcmtzcGFjZS9wcmVwcm9jZXNzaW5nLlJEYXRhYCB0byBydW4gdGhlIGZvbGxvd2luZyBjb2RlIGFuZCByZS11c2UgdGhlIHByZXZpb3VzbHkgY3JlYXRlZCB2YXJpYWJsZXMuIEZ1cnRoZXJtb3JlLCB0aGUgZm9sbG93aW5nIGxpYnJhcmllcyBtdXN0IGJlIGluc3RhbGxlZCBhbmQgbG9hZGVkOgpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQojIGludGFsbCBuZWNlc3NhcnkgcGFja2FnZXMKI2luc3RhbGwucGFja2FnZXMoInF1YW50ZWRhIikKI2luc3RhbGwucGFja2FnZXMoInJlYWR0ZXh0IikKI2luc3RhbGwucGFja2FnZXMoInRpZHl2ZXJzZSIpCiNpbnN0YWxsLnBhY2thZ2VzKCJxdWFudGVkYS50ZXh0c3RhdHMiKQojaW5zdGFsbC5wYWNrYWdlcygicXVhbnRlZGEudGV4dHBsb3RzIikKI2luc3RhbGwucGFja2FnZXMoImRhdGEudGFibGUiKQojaW5zdGFsbC5wYWNrYWdlcygic3RyaW5nciIpCiNpbnN0YWxsLnBhY2thZ2VzKCJzcGFjeXIiKQojaW5zdGFsbC5wYWNrYWdlcygidGV4dGNhdCIpCiNpbnN0YWxsLnBhY2thZ2VzKCJwbHlyIikKCiMgbG9hZCBsaWJyYXJpZXMKbGlicmFyeShxdWFudGVkYSkKbGlicmFyeShyZWFkdGV4dCkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkocXVhbnRlZGEudGV4dHBsb3RzKQpsaWJyYXJ5KHF1YW50ZWRhLnRleHRzdGF0cykKbGlicmFyeShwbHlyKQpsaWJyYXJ5KGRwbHlyKQojbGlicmFyeShzdHJpbmdyKQojbGlicmFyeShkYXRhLnRhYmxlKQojbGlicmFyeSh0ZXh0Y2F0KQpgYGAKCiMgMS4gQ29sbG9jYXRpb25zClRvIHJldHJpZXZlIHRoZSBkaXJlY3QgY29sbG9jYXRpb25zLCBpLmUuIG9uZSB0b2tlbiB0byB0aGUgbGVmdCBvciByaWdodCBvZiB0aGUgY29tcG91bmQgd29yZCwgd2UgdXNlIHRoZSBga3dpY2AgZnVuY3Rpb24gb2ZmZXJlZCBieSBgcXVhbnRlZGFgLiBIZXJlIHdlIGNhbiBjaG9vc2UgYSB3aW5kb3cgb2YgMSB0byBtYWtlIHN1cmUgd2Ugb2J0YWluIHRoZSBjb3JyZWN0IG51bWJlciBvZiBjb2xsb2NhdGlvbnMuCgojIyAxLjEgRmlyc3QgTG9vawpMZXQncyBoYXZlIGEgZmlyc3QgbG9vayBhdCB0aGUgY29sbG9jYXRpb25zIGZvciB0aGUgZXhhbXBsZSAiS2xpbWFsZXVnbmVyIiAoZW46ICJjbGltYXRlIGRlbmllciIpLiBXZSBhcmUgZ29pbmcgdG8gcmV0cmlldmUgdGhlIGNvbGxvY2F0aW9ucyB0byB0aGUgbGVmdCAoYHByZWApIGFuZCB0byB0aGUgcmlnaHQgKGBwb3N0YCkgb2YgdGhlIGtleSB3b3JkIGFuZCBjb3VudCB0aGVpciBvY2N1cnJlbmNlcy4gVGhlbiwgd2Ugd2lsbCBvdXRwdXQgdGhlIFRvcC01IGNvbGxvY2F0aW9ucyBmb3IgZWFjaCBjYXRlZ29yeSwgaS5lLiBgcHJlYCBhbmQgYHBvc3RgLiAKYGBge3J9CiMgYXBwbHkga2V5d29yZC1pbi1jb250ZXh0IGZ1bmN0aW9uIGZvciBnaXZlbiB3b3JkCndvcmQgPSAia2xpbWFsZXVnbmVyIgoKIyB0byBDMjAyMgprd2ljX2NvbiA8LSBrd2ljKHNwX2MyMDIyX3Rva2VucywgcGF0dGVybj13b3JkLCB3aW5kb3c9MSwgdmFsdWV0eXBlPSJmaXhlZCIpICU+JQogIGFzX3RpYmJsZSgpCgojIHRvIFAyMDIyCmt3aWNfcHJvIDwtIGt3aWMoc3BfcDIwMjJfdG9rZW5zLCBwYXR0ZXJuPXdvcmQsIHdpbmRvdz0xLCB2YWx1ZXR5cGU9ImZpeGVkIikgJT4lCiAgYXNfdGliYmxlKCkKYGBgCgoKTGV0J3Mgc2hvdyB0aGUgVG9wLTUgZm9yIHRoZSBDMjAyMiBjb3JwdXM6CmBgYHtyfQprd2ljX2NvbiAlPiUKICBkcGx5cjo6Y291bnQocHJlKSAlPiUKICBhcnJhbmdlKGRlc2MobikpICU+JQogIGhlYWQobj01KQoKa3dpY19jb24gJT4lCiAgZHBseXI6OiBjb3VudChwb3N0KSAlPiUKICBhcnJhbmdlKGRlc2MobikpICU+JQogIGhlYWQobj01KSAKYGBgCgpBbmQgdGhlIFRvcC01IGZvciB0aGUgUDIwMjIgY29ycHVzOgpgYGB7cn0Ka3dpY19wcm8gJT4lCiBkcGx5cjo6Y291bnQocHJlKSAlPiUKICBhcnJhbmdlKGRlc2MobikpICU+JQogIGhlYWQobj01KQoKa3dpY19wcm8gJT4lCiBkcGx5cjo6Y291bnQocG9zdCkgJT4lCiAgYXJyYW5nZShkZXNjKG4pKSAlPiUKICBoZWFkKG49NSkKYGBgCgojIyAxLjIgQXBwbHkgdG8gYWxsIEdsb3NzYXJ5IFRlcm1zCk5vdyB3ZSBzZWVrIHRvIGNyZWF0ZSB0YWJsZXMgdGhhdCBjb250YWluIHRoZSB0b3AgNSBgcHJlYCBhbmQgYHBvc3RgIGNvbGxvY2F0aW9ucyBmb3IgZWFjaCBvZiBvdXIgY29tcG91bmQgd29yZHMuIApGaXJzdGx5LCB3ZSBjcmVhdGUgYSB0YWJsZSBmb3IgdGhlIGNvbGxvY2F0aW9ucyB3ZSBjYW4gb2J0YWluIGZyb20gUDIwMjIKYGBge3J9CiMgZm9yIGVhY2ggY29tcG91bmQsIGdldCBsaXN0IG9mIHRvcCA1IGNvbGxvY2F0aW9ucwojIGluaXRpYXRlIGVtcHR5IGRhdGEgZnJhbWUgCnByb19jb2xsczEwID0gZGF0YS5mcmFtZSgpCgojIGZvciBlYWNoIGNvbXBvdW5kCmZvciAod29yZCBpbiBjb21wb3VuZHMpewoKICAjIGdldCBjb2xsb2NhdGlvbnMKICBrd2ljX3BybyA8LSBrd2ljKHNwX3AyMDIyX3Rva2VucywgcGF0dGVybj13b3JkLCB3aW5kb3c9MSwgdmFsdWV0eXBlPSJmaXhlZCIpICU+JQogICAgYXNfdGliYmxlKCkKICBrZXl3b3JkIDwtIHdvcmQgCiAgCiAgIyByZXRyaWV2ZSB0b3A1IHByZWNlZGluZyBjb2xsb2NhdGlvbnMKICBwcm9fcHJlIDwtIGt3aWNfcHJvICU+JQogICAgZHBseXI6OmNvdW50KHByZSkgJT4lCiAgICBhcnJhbmdlKGRlc2MobikpICU+JQogICAgaGVhZChuPTUpCgogICMgcmV0cmlldmUgdG9wNSBmb2xsb3dpbmcgY29sbG9jYXRpb25zCiAgcHJvX3Bvc3QgPC0ga3dpY19wcm8gJT4lCiAgICBkcGx5cjo6Y291bnQocG9zdCkgJT4lCiAgICBhcnJhbmdlKGRlc2MobikpICU+JQogICAgaGVhZChuPTUpCgogICMgbm9ybWFsaXplIGRhdGEgZnJhbWVzIHdpdGggdG9wNSBjb2xsb2NhdGlvbnMgCiAgcHJvX3ByZSRrZXl3b3JkIDwtIGtleXdvcmQKICBwcm9fcHJlJHRhZyA8LSAicHJlIgogIG5hbWVzKHByb19wcmUpW25hbWVzKHByb19wcmUpID09ICdwcmUnXSA8LSAid29yZCIKCiAgcHJvX3Bvc3Qka2V5d29yZCA8LSBrZXl3b3JkCiAgcHJvX3Bvc3QkdGFnIDwtICJwb3N0IgogIG5hbWVzKHByb19wb3N0KVtuYW1lcyhwcm9fcG9zdCkgPT0gJ3Bvc3QnXSA8LSAid29yZCIKICAKICBwcm9fY29sbHMxMCA8LSByYmluZChwcm9fY29sbHMxMCwgcHJvX3ByZSkKICBwcm9fY29sbHMxMCA8LSByYmluZChwcm9fY29sbHMxMCwgcHJvX3Bvc3QpfQpgYGAKCk1vc3Qgb2YgdGhlIGNvbGxvY2F0aW9ucyBvbmx5IG9jY3VyIGV4YWN0bHkgb25jZS4gU2luY2UgdGhpcyBpcyBub3QgdmVyeSBpbmZvcm1hdGl2ZSBmb3IgdXMsIHdlIHJlbW92ZSBhbGwgdGhlIGNvbGxvY2F0aW9ucyB3aXRoIGEgY291bnQgb2YgZXhhY3RseSAxLiBBbHNvLCB3ZSB3YW50IHRvIHJlbW92ZSBub2lzZSwgaS5lLiBlbXB0eSBzdHJpbmdzIGZyb20gdGhlIGNvbGxvY2F0aW9ucy4gCmBgYHtyfQojIG9ubHkga2VlcCBjb2xsb2NhdGlvbnMgdGhhdCBhcHBlYXIgbW9yZSB0aGFuIG9uY2UgCnRvcF9jb2xsc19wcm88LXByb19jb2xsczEwWyhwcm9fY29sbHMxMCRuID4gMSksXQoKIyByZW1vdmUgZW1wdHkgc3RyaW5ncyAKdG9wX2NvbGxzX3BybzwtdG9wX2NvbGxzX3Byb1sodG9wX2NvbGxzX3BybyR3b3JkID4gIiAiKSxdCmBgYAoKQW5kIHNhdmUgdGhlIHRhYmxlIHRvIGEgY3N2IGZpbGUuCmBgYHtyfQojd3JpdGUuY3N2KHRvcF9jb2xsc19wcm8sICIuLi9vdXRwdXQvdG9wX2NvbGxvY2F0aW9uc19wcm8uY3N2IikKYGBgCgpUaGVuLCB3ZSBjcmVhdGUgdGhlIHNhbWUgdGFibGUgb2YgdGhlIHRvcCA1IGBwcmVgIGFuZCBgcG9zdGAgY29sbG9jYXRpb25zIGZvciB0aGUgQzIwMjIuCmBgYHtyfQojIGZvciBlYWNoIGNvbXBvdW5kLCBnZXQgbGlzdCBvZiB0b3AgNSBjb2xsb2NhdGlvbnMKIyBpbml0aWF0ZSBlbXB0eSBkYXRhIGZyYW1lIApjb25fY29sbHMxMCA9IGRhdGEuZnJhbWUoKQoKIyBmb3IgZWFjaCBjb21wb3VuZApmb3IgKHdvcmQgaW4gY29tcG91bmRzKXsKCiAgIyBnZXQgY29sbG9jYXRpb25zCiAga3dpY19jb24gPC0ga3dpYyhzcF9jMjAyMl90b2tlbnMsIHBhdHRlcm49d29yZCwgd2luZG93PTEsIHZhbHVldHlwZT0iZml4ZWQiKSAlPiUKICAgIGFzX3RpYmJsZSgpCiAgI2tleXdvcmQgPC0ga3dpY19wcm8ka2V5d29yZFtbMV1dCiAga2V5d29yZCA8LSB3b3JkIAogIAogICMgcmV0cmlldmUgdG9wNSBwcmVjZWRpbmcgY29sbG9jYXRpb25zCiAgY29uX3ByZSA8LSBrd2ljX2NvbiAlPiUKICAgIGRwbHlyOjpjb3VudChwcmUpICU+JQogICAgYXJyYW5nZShkZXNjKG4pKSAlPiUKICAgIGhlYWQobj01KQoKICAjIHJldHJpZXZlIHRvcDUgZm9sbG93aW5nIGNvbGxvY2F0aW9ucwogIGNvbl9wb3N0IDwtIGt3aWNfY29uICU+JQogICAgZHBseXI6OmNvdW50KHBvc3QpICU+JQogICAgYXJyYW5nZShkZXNjKG4pKSAlPiUKICAgIGhlYWQobj01KQoKICAjIG5vcm1hbGl6ZSBkYXRhIGZyYW1lcyB3aXRoIHRvcDUgY29sbG9jYXRpb25zIAogIGNvbl9wcmUka2V5d29yZCA8LSBrZXl3b3JkCiAgY29uX3ByZSR0YWcgPC0gInByZSIKICBuYW1lcyhjb25fcHJlKVtuYW1lcyhjb25fcHJlKSA9PSAncHJlJ10gPC0gIndvcmQiCgogIGNvbl9wb3N0JGtleXdvcmQgPC0ga2V5d29yZAogIGNvbl9wb3N0JHRhZyA8LSAicG9zdCIKICBuYW1lcyhjb25fcG9zdClbbmFtZXMoY29uX3Bvc3QpID09ICdwb3N0J10gPC0gIndvcmQiCiAgCiAgY29uX2NvbGxzMTAgPC0gcmJpbmQoY29uX2NvbGxzMTAsIGNvbl9wcmUpCiAgY29uX2NvbGxzMTAgPC0gcmJpbmQoY29uX2NvbGxzMTAsIGNvbl9wb3N0KX0KYGBgCgpBbmQsIGp1c3QgbGlrZSBiZWZvcmUsIHdlIHJlbW92ZSB0aGUgY29sbG9jYXRpb25zIHRoYXQgYXBwZWFyZWQgb25seSBvbmNlIGluIHRoZSBjb3JwdXMgKGFuZCByZW1vdmUgbm9pc2UsIGkuZS4gZW1wdHkgc3RyaW5ncyBmcm9tIHRoZSBjb2xsb2NhdGlvbnMpLgpgYGB7cn0KIyBvbmx5IGtlZXAgY29sbG9jYXRpb25zIHRoYXQgYXBwZWFyIG1vcmUgdGhhbiBvbmNlIAp0b3BfY29sbHNfY29uPC1jb25fY29sbHMxMFsoY29uX2NvbGxzMTAkbiA+IDEpLF0KIyByZW1vdmUgZW1wdHkgc3RyaW5ncyAKdG9wX2NvbGxzX2NvbjwtdG9wX2NvbGxzX2NvblsodG9wX2NvbGxzX2NvbiR3b3JkID4gIiAiKSxdCmBgYAoKQW5kIHNhdmUgdGhlIGZpbmFsIHRhYmxlIHRvIGEgY3N2IGZpbGUuCmBgYHtyfQojd3JpdGUuY3N2KHRvcF9jb2xsc19jb24sICIuLi9vdXRwdXQvdG9wX2NvbGxvY2F0aW9uc19jb24uY3N2IikKYGBgCgojIDIuIENvbmNvcmRhbmNlcyAoS1dJQykKVG8gcmV0cmlldmUgdGhlIGNvbnRleHQgb2YgZWFjaCBjb21wb3VuZCB3b3JkLCB3ZSBleHRyYWN0IHRoZSBjb25jb3JkYW5jZXMgb24gYSBzZW50ZW5jZSBsZXZlbC4gVGhhdCBtZWFucywgd2UgZXh0cmFjdCBhIHdpbmRvdyBvZiA1IHNlbnRlbmNlcyB0byB0aGUgbGVmdCBhbmQgdG8gdGhlIHJpZ2h0IG9mIHRoZSBrZXl3b3JkIHNlbnRlbmNlLiBUbyBkbyB0aGlzLCB3ZSBtdXN0IHRva2VuaXplIG91ciBkYXRhIGJ5IHNlbnRlbmNlcywgaW5zdGVhZCBvZiB3b3Jkcy4KCiMjIDIuMSBQcmVwcm9jZXNzaW5nIApTaW5jZSB3ZSBjYW5ub3Qgbm9ybWFsaXplIHRoZSBkYXRhIHRoZSBzYW1lIHdheSB3aGVuIHdlIHRva2VuaXplIGl0IG9uIHNlbnRlbmNlLWxldmVsLCB3ZSBmaXJzdGx5IGNyZWF0ZSB3b3JkIHRva2VucyBmcm9tIHRoZSBjb3Jwb3JhLgpgYGB7cn0KIyBjcmVhdGUgd29yZCB0b2tlbnMgZm9yIFAyMDIyIGFuZCBDMjAyMgpwMjAyMl90b2tlbnMgPC0gdG9rZW5zKHBybzIwMjIsIHJlbW92ZV9wdW5jdCA9IEZBTFNFLCByZW1vdmVfc3ltYm9scyA9IFRSVUUsCiAgICAgICAgICAgICAgICAgIHJlbW92ZV9udW1iZXJzID0gVFJVRSwgcmVtb3ZlX3VybCA9IFRSVUUsIHJlbW92ZV9zZXBhcmF0b3JzID0gVFJVRSkKCmMyMDIyX3Rva2VucyA8LSB0b2tlbnMoY29udHJhMjAyMiwgcmVtb3ZlX3B1bmN0ID0gRkFMU0UsIHJlbW92ZV9zeW1ib2xzID0gVFJVRSwKICAgICAgICAgICAgICAgICAgcmVtb3ZlX251bWJlcnMgPSBUUlVFLCByZW1vdmVfdXJsID0gVFJVRSwgcmVtb3ZlX3NlcGFyYXRvcnMgPSBUUlVFKQpgYGAKCgpUbyB0aGVzZSB0b2tlbnMsIHdlIGFwcGx5IGEgbm9ybWFsaXphdGlvbiBzdGVwIHdoZXJlIHdlIHJlbW92ZSBoeXBoZW5zIHdpdGhpbiB3b3Jkcywgc3VjaCBhcyAiS2xpbWEtU2tlcHRpa2VyIiB0byBjb252ZXJ0IGl0IHRvICJLbGltYXNrZXB0aWtlciIuIApgYGB7cn0KIyByZW1vdmUgaHlwaGVucyBmcm9tIHRva2VucwoKIyBjb252ZXJ0IHRvIHRva2VucwojcDIwMjJfdG9rc19jbGVhbmVkIDwtIGFzLnRva2VucyhwMjAyMl90b2tlbnMpCgojIHJlcGxhY2UgbXVsdGktdG9rZW4gc2VxdWVuY2VzIHdpdGggYSAiY29tcG91bmQiIHRva2VuIAojdG9rc19jb21wIDwtIHRva2Vuc19jb21wb3VuZChwMjAyMl90b2tzX2NsZWFuZWQsIHBocmFzZSgiKi0qIiksIGNvbmNhdGVuYXRvciA9IiIpCnRva3NfY29tcF9wIDwtIHRva2Vuc19jb21wb3VuZChwMjAyMl90b2tlbnMsIHBocmFzZSgiKi0qIiksIGNvbmNhdGVuYXRvciA9IiIpCgoKIyBnZXQgdG9rZW5zIGNvbnRhaW5pbmcgdGhlIGh5cGhlbgp0b2tzX2h5cGhlbmF0ZWRfcCA8LSBncmVwKCJcXHcrLVxcdysiLCB0eXBlcyh0b2tzX2NvbXBfcCksIHZhbHVlID0gVFJVRSkKCiMgcmVwbGFjZSB0aGUgaHlwaGVuYXRlZCB0b2tlbnMgYnkgdmVyc2lvbnMgd2l0aG91dCBoeXBoZW4KcDIwMjJfdG9rc19jbGVhbmVkIDwtIHRva2Vuc19yZXBsYWNlKHRva3NfY29tcF9wLCB0b2tzX2h5cGhlbmF0ZWRfcCwgZ3N1YigiLSIsICIiLCB0b2tzX2h5cGhlbmF0ZWRfcCkpCgojIGNvbnZlcnQgdG8gdG9rZW5zCiNjMjAyMl90b2tzX2NsZWFuZWQgPC0gYXMudG9rZW5zKGMyMDIyX3Rva2VucykKCiN0b2tzX2NvbXAgPC0gdG9rZW5zX2NvbXBvdW5kKGMyMDIyX3Rva3NfY2xlYW5lZCwgcGhyYXNlKCIqLSoiKSwgY29uY2F0ZW5hdG9yID0iIikKdG9rc19jb21wX2MgPC0gdG9rZW5zX2NvbXBvdW5kKGMyMDIyX3Rva2VucywgcGhyYXNlKCIqLSoiKSwgY29uY2F0ZW5hdG9yID0iIikKCiMgZ2V0IHRva2VucyBjb250YWluaW5nIHRoZSBoeXBoZW4KdG9rc19oeXBoZW5hdGVkX2MgPC0gZ3JlcCgiXFx3Ky1cXHcrIiwgdHlwZXModG9rc19jb21wX2MpLCB2YWx1ZSA9IFRSVUUpCgojIHJlcGxhY2UgdGhlIGh5cGhlbmF0ZWQgdG9rZW5zIGJ5IHZlcnNpb25zIHdpdGhvdXQgaHlwaGVuCmMyMDIyX3Rva3NfY2xlYW5lZCA8LSB0b2tlbnNfcmVwbGFjZSh0b2tzX2NvbXBfYywgdG9rc19oeXBoZW5hdGVkX2MsIGdzdWIoIi0iLCAiIiwgdG9rc19oeXBoZW5hdGVkX2MpKQoKIyBtZXJnZSB0b2tlbnMgYmFjayBpbnRvIGNvcnB1cyBvYmplY3QgCnAyMDIyX21lcmdlZF90b2tzIDwtIGNvcnB1cyhzYXBwbHkocDIwMjJfdG9rc19jbGVhbmVkLCBwYXN0ZSwgY29sbGFwc2UgPSAiICIpKQpjMjAyMl9tZXJnZWRfdG9rcyA8LSBjb3JwdXMoc2FwcGx5KGMyMDIyX3Rva3NfY2xlYW5lZCwgcGFzdGUsIGNvbGxhcHNlID0gIiAiKSkKCmBgYAoKTm93IHdlIGNhbiBjcmVhdGUgc2VudGVuY2UgdG9rZW5zIGZvciBib3RoIGNvcnBvcmEuCmBgYHtyfQojIGNyZWF0ZSAic2VudGVuY2UiIHRva2VucyBmb3IgUDIwMjIgYW5kIEMyMDIyIGNvcnB1cwpwMjAyMl9zZW50ZW5jZXMgPC0gdG9rZW5zKHAyMDIyX21lcmdlZF90b2tzLCByZW1vdmVfcHVuY3QgPSBGQUxTRSwgcmVtb3ZlX3N5bWJvbHMgPSBUUlVFLAogICAgICAgICAgICAgICAgICByZW1vdmVfbnVtYmVycyA9IFRSVUUsIHJlbW92ZV91cmwgPSBUUlVFLCByZW1vdmVfc2VwYXJhdG9ycyA9IFRSVUUsIAogICAgICAgICAgICAgICAgICB3aGF0ID0gInNlbnRlbmNlIikKCmMyMDIyX3NlbnRlbmNlcyA8LSB0b2tlbnMoYzIwMjJfbWVyZ2VkX3Rva3MsIHJlbW92ZV9wdW5jdCA9IEZBTFNFLCByZW1vdmVfc3ltYm9scyA9IFRSVUUsCiAgICAgICAgICAgICAgICAgIHJlbW92ZV9udW1iZXJzID0gVFJVRSwgcmVtb3ZlX3VybCA9IFRSVUUsIHJlbW92ZV9zZXBhcmF0b3JzID0gVFJVRSwgCiAgICAgICAgICAgICAgICAgIHdoYXQgPSAic2VudGVuY2UiKQpgYGAKCiMjIDIuMiBLZXkgV29yZCBJbiBDb250ZXh0IFJldHJpZXZhbApgYGB7cn0KIyBjcmVhdGUgYSBkYXRhIGZyYW1lIGZyb20gdG9rZW5zIGNvbnRhaW5pbmcgNSBzZW50ZW5jZXMgYmVmb3JlIGFuZCBhZnRlciB0aGUga2V5d29yZCAKCiMjIyBETyBGT1IgcDIwMjIgIyMjIwprd2ljX3Byb19zZW50LmRmIDwtIGRhdGEuZnJhbWUobWF0cml4KG5jb2wgPSA3LCBucm93ID0gMCkpCmt3aWNsaXN0X3NlbnRfcHJvIDwtIGxpc3QoKQoKIyBmb3IgZWFjaCBjb21wb3VuZCB3b3JkCmZvciAod29yZCBpbiBjb21wb3VuZHMpCnsKICAjIHJldHJpZXZlIHNlbnRlbmNlcyBiZWZvcmUvYWZ0ZXIga2V5d29yZCAKICBjb250ZXh0X3Byb19zZW50IDwtIGt3aWMocDIwMjJfc2VudGVuY2VzLCB3b3JkLCB2YWx1ZXR5cGU9InJlZ2V4Iiwgd2luZG93PTUpCiAga3dpY2xpc3Rfc2VudF9wcm9bW3dvcmRdXSA8LSBjb250ZXh0X3Byb19zZW50ICMgc2F2ZSB0byBsaXN0IAp9Cgprd2ljX3Byb19zZW50LmRmID0gZG8uY2FsbChyYmluZCwga3dpY2xpc3Rfc2VudF9wcm8pICMgc2F2ZSB0byBmaW5hbCBkYXRhIGZyYW1lIAoKIyMjIERPIEZPUiBjMjAyMiAjIyMKa3dpY19jb25fc2VudC5kZiA8LSBkYXRhLmZyYW1lKG1hdHJpeChuY29sID0gNywgbnJvdyA9IDApKSAKa3dpY2xpc3Rfc2VudF9jb24gPC0gbGlzdCgpCgojIGZvciBlYWNoIGNvbXBvdW5kIHdvcmQKZm9yICh3b3JkIGluIGNvbXBvdW5kcykKewogICMgcmV0cmlldmUgc2VudGVuY2VzIGJlZm9yZS9hZnRlciBrZXl3b3JkIAogIGNvbnRleHRfY29uX3NlbnQgPC0ga3dpYyhjMjAyMl9zZW50ZW5jZXMsIHdvcmQsIHZhbHVldHlwZT0icmVnZXgiLCB3aW5kb3c9NSkgCiAga3dpY2xpc3Rfc2VudF9jb25bW3dvcmRdXSA8LSBjb250ZXh0X2Nvbl9zZW50ICMgc2F2ZSB0byBsaXN0Cn0KCmt3aWNfY29uX3NlbnQuZGYgPSBkby5jYWxsKHJiaW5kLCBrd2ljbGlzdF9zZW50X2NvbikgIyBzYXZlIHRvIGZpbmFsIGRhdGEgZnJhbWUgCgprd2ljX3Byb19zZW50LmRmCmt3aWNfY29uX3NlbnQuZGYKYGBgCgojIDIuMiBFeHBvcnQgQ29uY29yZGFuY2VzCmBgYHtyfQojIHNhdmUgdG8gY3N2IGZpbGUgCiN3cml0ZS5jc3Yoa3dpY19wcm9fc2VudC5kZiwiLi4vb3V0cHV0L3Byb19jb250ZXh0X25ldy5jc3YiLCByb3cubmFtZXMgPSBGQUxTRSkKI3dyaXRlLmNzdihrd2ljX2Nvbl9zZW50LmRmLCIuLi9vdXRwdXQvY29uX2NvbnRleHRfbmV3LmNzdiIsIHJvdy5uYW1lcyA9IEZBTFNFKQpgYGAKCiMgMy4gVGVybSBGcmVxdWVuY2llcwpBZGRpdGlvbmFsbHksIHdlIGNvbXB1dGUgdGhlIHRlcm0gZnJlcXVlbmNpZXMgb2YgZWFjaCBjb21wb3VuZCB3b3JkIGFuZCB0aGUgYWNjb3JkaW5nIFRGLUlERiBzY29yZSwgc2luY2UgYm90aCBjb3Jwb3JhIGhhdmUgYSBkaWZmZXJlbnQgc2l6ZSBhbmQgd2Ugd2FudCB0byBleHBsb3JlIHRoZSByZWxldmFuY2Ugb2YgZWFjaCB0ZXJtLiAKCkNyZWF0ZSBhIGZ1bmN0aW9uIHRvIG5vcm1hbGl6ZSB0aGUgVEYtSURGIHNjb3JlcwpgYGB7cn0KIyBtaW4vbWF4IG5vcm1hbGl6YXRpb24gZnJvbSAtMSB0byAxLCByZWxhdGl2ZSB0byBkYXRhIGZyYW1lIHJlc3VsdHMKbm9ybWFsaXplIDwtIGZ1bmN0aW9uKHgsIG5hLnJtID0gVFJVRSl7CiAgcmV0dXJuKCh4IC0gbWluKHgpKSAvIChtYXgoeCktbWluKHgpKSl9CmBgYAoKCmBgYHtyfQojIHVzZSBsZW1tYXRpemVkIHRva2VucyB0byByZS1jcmVhdGUgYSBjb3JwdXMKIyB3ZSBuZWVkIHRoaXMgc3RlcCBmb3IgdGhlIGdyb3VwaW5nIG9mIHRoZSBmcmVxdWVuY2llcyAKcDIwMjJfY2xlYW5lZCA8LSBjb3JwdXMoc2FwcGx5KHNwX3AyMDIyX3Rva2VucywgcGFzdGUsIGNvbGxhcHNlID0gIiAiKSkKYzIwMjJfY2xlYW5lZCA8LSBjb3JwdXMoc2FwcGx5KHNwX2MyMDIyX3Rva2VucywgcGFzdGUsIGNvbGxhcHNlID0gIiAiKSkKCiMgY3JlYXRlIGdyb3VwcwpwMjAyMl9jbGVhbmVkJGdyb3VwIDwtICJhY3RpdmlzdHMiCmMyMDIyX2NsZWFuZWQkZ3JvdXAgPC0gInNrZXB0aWNzIgoKIyBjcmVhdGUgYSBjb3JwdXMgY29udGFpbmluZyBib3RoIHN1YmRpc2NvdXJzZXMKY29tcGxldGUgPSBwMjAyMl9jbGVhbmVkK2MyMDIyX2NsZWFuZWQKYGBgCgpgYGB7cn0KIyBjcmVhdGUgZGZtIHdpdGggZnJlcXVlbmNpZXMgcGVyIGdyb3VwCmRmbV9jb21wbGV0ZV9mcmVxIDwtIGRmbShjb21wbGV0ZSkgJT4lIAogICAgICAgICAgICAgICAgICAgICAgIGRmbV9rZWVwKHBhdHRlcm4gPSBjb21wb3VuZHMpICU+JSAjIG9ubHkga2VlcCBjb21wb3VuZCB3b3JkcwogICAgICAgICAgICAgICAgICAgICAgIGRmbV9ncm91cChncm91cHMgPSBncm91cCkgIyBrZWVwIGdyb3VwcyAiYWN0aXZpc3RzIiBhbmQgInNrZXB0aWNzIgoKIyBjb252ZXJ0IGRmbSB0byBkYXRhIGZyYW1lIApkZm1fY29tcGxldGVfZGYgPC0gZGZtX2NvbXBsZXRlX2ZyZXEgJT4lIAogICAgICAgICAgICAgICAgICBjb252ZXJ0KHRvID0gImRhdGEuZnJhbWUiKSAlPiUKICAgICAgICAgICAgICAgICAgdCgpICMgdHJhbnNwb3NlIGRhdGEgZnJhbWUKCnNldC5zZWVkKDEzMikgIyBzZXQgc2VlZCBmb3IgcmVwcm9kdWNpYmlsaXR5CnRleHRwbG90X3dvcmRjbG91ZChkZm1fY29tcGxldGVfZnJlcSwgY29tcGFyaXNvbiA9IFRSVUUsIG1heF93b3JkcyA9IDI1MCkgIyBwbG90IHdvcmRjbG91ZAoKI3dyaXRlLmNzdihkZm1fY29tcGxldGVfZGYsIi9Vc2Vycy9hbm5hL0RvY3VtZW50cy91bmkvdGhlc2lzL2ltcGxlbWVudGF0aW9uL1Ivb3V0cHV0L3RmX2NvbXBsZXRlLmNzdiIsIHJvdy5uYW1lcyA9IFRSVUUpCmBgYApBbmQgY29tcHV0ZSBURi1JREYgb2YgdGhlIERGTXMKYGBge3J9CiMjIyBGT1IgQzIwMjIKIyBjcmVhdGUgZGZtIG9mIGxlbW1hdGl6ZWQgdG9rZW5zLCBvbmx5IGtlZXAgY29tcG91bmQgd29yZHMKI2RmbV9jMjAyMiA8LSBkZm0oc3BfYzIwMjJfdG9rZW5zKSAlPiUgZGZtX2tlZXAocGF0dGVybiA9IGNvbXBvdW5kcykKI2RmbV9jMjAyMl90ZmlkZiA8LSBkZm1fdGZpZGYoZGZtX2MyMDIyKSAjIGNvbXB1dGUgdGZpZGYgc2NvcmVzCiN0b3BfYzIwMjJfbm9ybSA8LSBub3JtYWxpemUodG9wZmVhdHVyZXMoZGZtX2MyMDIyLCBuPTMwMCkpICMgbm9ybWFsaXplIHNjb3JlcwoKZGZtX2MyMDIyIDwtIGRmbShzcF9jMjAyMl90b2tlbnMpICU+JSAKICAgICAgICAgICAgZGZtX3RmaWRmKCkgJT4lCiAgICAgICAgICAgIGRmbV9rZWVwKHBhdHRlcm4gPSBjb21wb3VuZHMpIAoKYzIwMjJfdGZpZGYgPC0gbm9ybWFsaXplKHRvcGZlYXR1cmVzKGRmbV9jMjAyMiwgbj0zMDApKQoKICAKICMgZGZtX2tlZXAocGF0dGVybiA9IGNvbXBvdW5kcykKI2RmbV9jMjAyMl90ZmlkZiA8LSBkZm1fdGZpZGYoZGZtX2MyMDIyKSAjIGNvbXB1dGUgdGZpZGYgc2NvcmVzCiN0b3BfYzIwMjJfbm9ybSA8LSBub3JtYWxpemUodG9wZmVhdHVyZXMoZGZtX2MyMDIyLCBuPTMwMCkpCgoKIyBjb252ZXJ0IGRmbSBpbnRvIGRhdGEgZnJhbWUKdG9wX2MyMDIyX25vcm0gPC0gZGF0YS5mcmFtZShUZXJtID0gbmFtZXMoYzIwMjJfdGZpZGYpLCBGcmVxID0gYzIwMjJfdGZpZGYsIHJvdy5uYW1lcyA9IE5VTEwpICU+JQogIGRwbHlyOjphcnJhbmdlKGRlc2MoRnJlcSkpCgojIyMgRk9SIFAyMDIyCiMgY3JlYXRlIGRmbSBvZiBsZW1tYXRpemVkIHRva2Vucywgb25seSBrZWVwIGNvbXBvdW5kIHdvcmRzCiNkZm1fcDIwMjIgPC0gZGZtKHNwX3AyMDIyX3Rva2VucykgJT4lIGRmbV9rZWVwKHBhdHRlcm4gPSBjb21wb3VuZHMpIAojZGZtX3AyMDIyX3RmaWRmIDwtIGRmbV90ZmlkZihkZm1fcDIwMjIpICMgY29tcHV0ZSB0ZmlkZiBzY29yZXMKI3RvcF9wMjAyMl9ub3JtIDwtIG5vcm1hbGl6ZSh0b3BmZWF0dXJlcyhkZm1fcDIwMjIsIG49MzAwKSkgIyBub3JtYWxpemUgc2NvcmVzCgoKZGZtX3AyMDIyIDwtIGRmbShzcF9wMjAyMl90b2tlbnMpICU+JSAKICAgICAgICAgICAgZGZtX3RmaWRmKCkgJT4lCiAgICAgICAgICAgIGRmbV9rZWVwKHBhdHRlcm4gPSBjb21wb3VuZHMpIAoKcDIwMjJfdGZpZGYgPC0gbm9ybWFsaXplKHRvcGZlYXR1cmVzKGRmbV9wMjAyMiwgbj0zMDApKQoKIyBjb252ZXJ0IGRmbSBpbnRvIGRhdGEgZnJhbWUKdG9wX3AyMDIyX25vcm0gPC0gZGF0YS5mcmFtZShUZXJtID0gbmFtZXMocDIwMjJfdGZpZGYpLCBGcmVxID0gcDIwMjJfdGZpZGYsIHJvdy5uYW1lcyA9IE5VTEwpICU+JQogIGRwbHlyOjphcnJhbmdlKGRlc2MoRnJlcSkpCgojIGNoYW5nZSBjb2x1bW4gbmFtZXMgdG8gYmUgYWJsZSB0byBtZXJnZSBib3RoIGRhdGEgZnJhbWVzCmNvbG5hbWVzKHRvcF9wMjAyMl9ub3JtKVsyXSA8LSAiRnJlcV9QMjAyMiIKY29sbmFtZXModG9wX2MyMDIyX25vcm0pWzJdIDwtICJGcmVxX0MyMDIyIgoKIyBtZXJnZSBkYXRhIGZyYW1lcyAKZGZfbWVyZ2UgPC0gbWVyZ2UodG9wX2MyMDIyX25vcm0sdG9wX3AyMDIyX25vcm0sYnk9IlRlcm0iLCBhbGwueCA9IFRSVUUsIGFsbC55ID0gVFJVRSkKCiMgd3JpdGUgdG8gY3N2IGZpbGUKd3JpdGUuY3N2KGRmX21lcmdlLCIvVXNlcnMvYW5uYS9Eb2N1bWVudHMvdW5pL3RoZXNpcy9pbXBsZW1lbnRhdGlvbi9SL291dHB1dC90ZmlkZl9jb21wbGV0ZS5jc3YiLCByb3cubmFtZXMgPSBUUlVFKQpgYGAKCgojIyBQbG90IFRGLUlERiBTY29yZXMKYGBge3J9CiMgcmV0cmlldmUgZnJlcXVlbmN5IHRhYmxlIG9mIGRmbQpmcmVxc19wcm8gPC0gdGV4dHN0YXRfZnJlcXVlbmN5KGRmbV9wMjAyMiwgZm9yY2U9VFJVRSkKZnJlcXNfY29uIDwtIHRleHRzdGF0X2ZyZXF1ZW5jeShkZm1fYzIwMjIsIGZvcmNlPVRSVUUpCgojIGNhcGl0YWxpemUgZmlyc3QgbGV0dGVyIG9mIGNvbXBvdW5kCmZyZXFzX3BybyRmZWF0dXJlIDwtIHN0cl90b190aXRsZShmcmVxc19wcm8kZmVhdHVyZSkKZnJlcXNfY29uJGZlYXR1cmUgPC0gc3RyX3RvX3RpdGxlKGZyZXFzX2NvbiRmZWF0dXJlKQoKIyBhcHBseSBub3JtYWxpemF0aW9uCmZyZXFzX3BybyRub3JtYWxpemUgPSByb3VuZChub3JtYWxpemUoZnJlcXNfcHJvJGZyZXF1ZW5jeSksMykKZnJlcXNfY29uJG5vcm1hbGl6ZSA9IHJvdW5kKG5vcm1hbGl6ZShmcmVxc19jb24kZnJlcXVlbmN5KSwzKQoKIyBwbG90IGNvbXBhcmlzb24gb2YgYm90aCBncm91cHMKZnJlcXMuYWN0IDwtIGZpbHRlcihmcmVxc19wcm8pICU+JSBhcy5kYXRhLmZyYW1lKCkgJT4lIHNlbGVjdChmZWF0dXJlLCBub3JtYWxpemUpCmZyZXFzLnNjZXB0IDwtIGZpbHRlcihmcmVxc19jb24pICU+JSBhcy5kYXRhLmZyYW1lKCkgJT4lIHNlbGVjdChmZWF0dXJlLCBub3JtYWxpemUpCmZyZXFzIDwtIGxlZnRfam9pbihmcmVxcy5hY3QsIGZyZXFzLnNjZXB0LCBieSA9ICJmZWF0dXJlIikgJT4lIGhlYWQoNTApICU+JSBhcnJhbmdlKG5vcm1hbGl6ZS54KSAlPiUgbXV0YXRlKGZlYXR1cmUgPSBmYWN0b3IoZmVhdHVyZSwgZmVhdHVyZSkpCgojIGNyZWF0ZSBwbG90CnBsb3Q4IDwtIGdncGxvdChmcmVxcykgKwogICAgZ2VvbV9zZWdtZW50KGFlcyh4PWZlYXR1cmUsIHhlbmQ9ZmVhdHVyZSwgeT1ub3JtYWxpemUueCwgeWVuZD1ub3JtYWxpemUueSksIGNvbG9yPSJncmV5IikgKwogICAgZ2VvbV9wb2ludChhZXMoeD1mZWF0dXJlLCB5PW5vcm1hbGl6ZS54LCBjb2xvdXI9IkFjdGl2aXN0cyIpLCBzaXplID0gMykgKwogICAgZ2VvbV9wb2ludChhZXMoeD1mZWF0dXJlLCB5PW5vcm1hbGl6ZS55LCBjb2xvdXI9IlNjZXB0aWNzIiksIHNpemUgPSAzKSArCiAgICBnZ3RpdGxlKCJDb21wYXJpc29uICdLbGltYScgVEYtSURGIFNjb3JlcyBwZXIgR3JvdXAiKSArIAogICAgeGxhYigiIikgKyB5bGFiKCJURi1JREYiKSArCiAgICBjb29yZF9mbGlwKCkKCnBsb3Q4K2xhYnMoY29sb3VyPSJHcm91cCIpCgojIHNhdmUgdG8gcG5nIAojZ2dzYXZlKCIvVXNlcnMvYW5uYS9Eb2N1bWVudHMvdW5pL3RoZXNpcy9wbG90cy9jb21wYXJpc29uX3RmaWRmLnBuZyIsIGRwaT0zMDAsIGRldj0ncG5nJywgaGVpZ2h0PTEwLCB3aWR0aD0xNSwgdW5pdHM9ImluIikKYGBgCiMgVE8gREVMRVRFISEhIQojIyBQbG90IFRGLUlERiBTY29yZXMKYGBge3J9CiMgY3JlYXRlIGEgc2FtcGxlIG9mIHRoZSBkZm0gd2l0aCBhbGwgd29yZHMgc3RhcnRpbmcgd2l0aCAia2xpbWEuLi4iIAoja2xpbWFfcDIwMDAgPC0gZGZtX3NlbGVjdChkZm1fcDIwMDBfbGVtbWEsIHBhdHRlcm49ImtsaW1hKiIpCiNrbGltYV9jMjAwMCA8LSBkZm1fc2VsZWN0KGRmbV9jMjAwMF9sZW1tYSwgcGF0dGVybj0ia2xpbWEqIikKCiMjIyMjCiNjMjAyMl9kZm0gPC0gZGZtKHNwX2MyMDIyX3Rva2VucykgCiNjMjAyMl90ZmlkZiA8LSBkZm1fdGZpZGYoYzIwMjJfZGZtKQoKI3AyMDIyX2RmbSA8LSBkZm0oc3BfcDIwMjJfdG9rZW5zKSAKI3AyMDIyX3RmaWRmIDwtIGRmbV90ZmlkZihwMjAyMl9kZm0pCiMjIyMjIwoKCiNkZm1fYzIwMjIgPC0gZGZtKHNwX2MyMDIyX3Rva2VucykgJT4lIGRmbV9rZWVwKHBhdHRlcm4gPSBjb21wb3VuZHMpCiNkZm1fYzIwMjJfdGZpZGYgPC0gZGZtX3RmaWRmKGRmbV9jMjAyMikgIyBjb21wdXRlIHRmaWRmIHNjb3JlcwojdG9wX2MyMDIyX25vcm0gPC0gbm9ybWFsaXplKHRvcGZlYXR1cmVzKGRmbV9jMjAyMiwgbj0zMDApKQoKIyBjYWxjdWxhdGUgdGZpZGYgZm9yICJrbGltYSIgd29yZHMKI3AyMDAwX3RmaWRmIDwtIGRmbV90ZmlkZihrbGltYV9wMjAwMCwgc2NoZW1lX3RmID0gInByb3AiLCBzY2hlbWVfZGYgPSAiaW52ZXJzZSIpCiNjMjAwMF90ZmlkZiA8LSBkZm1fdGZpZGYoa2xpbWFfYzIwMDAsIHNjaGVtZV90ZiA9ICJwcm9wIiwgc2NoZW1lX2RmID0gImludmVyc2UiKQoKIyByZXRyaWV2ZSBmcmVxdWVuY2llcyBmb3IgImtsaW1hIiB3b3JkcwpmcmVxc19wcm8gPC0gdGV4dHN0YXRfZnJlcXVlbmN5KHAyMDIyX3RmaWRmLCBmb3JjZT1UUlVFKQpmcmVxc19jb24gPC0gdGV4dHN0YXRfZnJlcXVlbmN5KGMyMDIyX3RmaWRmLCBmb3JjZT1UUlVFKQoKIyBhcHBseSBub3JtYWxpemF0aW9uCiNmcmVxc19wcm8kbm9ybWFsaXplID0gcm91bmQobm9ybWFsaXplKGZyZXFzX3BybyRmcmVxdWVuY3kpLCAzKQojZnJlcXNfY29uJG5vcm1hbGl6ZSA9IHJvdW5kKG5vcm1hbGl6ZShmcmVxc19jb24kZnJlcXVlbmN5KSwgMykKCiMgcmV0cmlldmUgb25seSB3b3JkcyB0aGF0IGFyZSBjb250YWluZWQgaW4gb3VyIGZpbmFsIGNvbXBvdW5kIGxpc3QgCmZyZXFzX3Byb19zdWJzZXQgPC0gZnJlcXNfcHJvW2ZyZXFzX3BybyRmZWF0dXJlICVpbiUgY29tcG91bmRzLCBdCmZyZXFzX3Byb19zdWJzZXQkZmVhdHVyZSA8LSBzdHJfdG9fdGl0bGUoZnJlcXNfcHJvX3N1YnNldCRmZWF0dXJlKQoKZnJlcXNfY29uX3N1YnNldCA8LSBmcmVxc19jb25bZnJlcXNfY29uJGZlYXR1cmUgJWluJSBjb21wb3VuZHMsIF0KZnJlcXNfY29uX3N1YnNldCRmZWF0dXJlIDwtIHN0cl90b190aXRsZShmcmVxc19jb25fc3Vic2V0JGZlYXR1cmUpCgoKZnJlcXNfcHJvX3N1YnNldCRub3JtYWxpemUgPSByb3VuZChub3JtYWxpemUoZnJlcXNfcHJvX3N1YnNldCRmcmVxdWVuY3kpLCAzKQpmcmVxc19jb25fc3Vic2V0JG5vcm1hbGl6ZSA9IHJvdW5kKG5vcm1hbGl6ZShmcmVxc19jb25fc3Vic2V0JGZyZXF1ZW5jeSksIDMpCgojIHBsb3QgY29tcGFyaXNvbiBvZiBib3RoIGdyb3VwcyAtPiBvbmx5IHdvcmRzIGZyb20gY29tcG91bmQgbGlzdCAKZnJlcXMuYWN0IDwtIGZpbHRlcihmcmVxc19wcm9fc3Vic2V0KSAlPiUgYXMuZGF0YS5mcmFtZSgpICU+JSBzZWxlY3QoZmVhdHVyZSwgbm9ybWFsaXplKQpmcmVxcy5zY2VwdCA8LSBmaWx0ZXIoZnJlcXNfY29uX3N1YnNldCkgJT4lIGFzLmRhdGEuZnJhbWUoKSAlPiUgc2VsZWN0KGZlYXR1cmUsIG5vcm1hbGl6ZSkKZnJlcXMgPC0gbGVmdF9qb2luKGZyZXFzLmFjdCwgZnJlcXMuc2NlcHQsIGJ5ID0gImZlYXR1cmUiKSAlPiUgaGVhZCgzMCkgJT4lIGFycmFuZ2Uobm9ybWFsaXplLngpICU+JSBtdXRhdGUoZmVhdHVyZSA9IGZhY3RvcihmZWF0dXJlLCBmZWF0dXJlKSkKCiMgY3JlYXRlIHBsb3QKcGxvdDggPC0gZ2dwbG90KGZyZXFzKSArCiAgICBnZW9tX3NlZ21lbnQoYWVzKHg9ZmVhdHVyZSwgeGVuZD1mZWF0dXJlLCB5PW5vcm1hbGl6ZS54LCB5ZW5kPW5vcm1hbGl6ZS55KSwgY29sb3I9ImdyZXkiKSArCiAgICBnZW9tX3BvaW50KGFlcyh4PWZlYXR1cmUsIHk9bm9ybWFsaXplLngsIGNvbG91cj0iQWN0aXZpc3RzIiksIHNpemUgPSAzKSArCiAgICBnZW9tX3BvaW50KGFlcyh4PWZlYXR1cmUsIHk9bm9ybWFsaXplLnksIGNvbG91cj0iU2NlcHRpY3MiKSwgc2l6ZSA9IDMpICsKICAgIGdndGl0bGUoIkNvbXBhcmlzb24gJ0tsaW1hJyBURi1JREYgU2NvcmVzIHBlciBHcm91cCIpICsgCiAgICB4bGFiKCIiKSArIHlsYWIoIlRGLUlERiIpICsKICAgIGNvb3JkX2ZsaXAoKQoKcGxvdDgrbGFicyhjb2xvdXI9Ikdyb3VwIikKCmdnc2F2ZSgiL1VzZXJzL2FubmEvRG9jdW1lbnRzL3VuaS90aGVzaXMvcGxvdHMvY29tcGFyaXNvbl9rbGltYV9mcmVxc19sZW1tYS5wbmciLCBkcGk9MzAwLCBkZXY9J3BuZycsIGhlaWdodD02LCB3aWR0aD0xMiwgdW5pdHM9ImluIikKYGBgCgoKCiMgVE8gREVMRVRFCgoKYGBge3J9Cmt3aWMoYzIwMjJfbWVyZ2VkX3Rva3MsIHBhdHRlcm49ImtsaW1hYW5iZXRlciIsIHdpbmRvdz0xLCB2YWx1ZXR5cGU9InJlZ2V4IikKa3dpYyhjMjAyMl9zZW50ZW5jZXMsIHBhdHRlcm49ImtsaW1hYmFuayIsIHdpbmRvdz0xLCB2YWx1ZXR5cGU9InJlZ2V4IikKCmBgYAoKCiMjIDEuMiBBcHBseSB0byBhbGwgR2xvc3NhcnkgVGVybXMKTm93LCB3ZSB3aWxsIHJldHJpZXZlIHRoZSBjb2xsb2NhdGlvbnMgZm9yIGVhY2ggY29tcG91bmQgd29yZC4gVGhlIGNvbGxvY2F0aW9ucyBhcmUgdGhlbiBzYXZlZCB0byBhIGRhdGEgZnJhbWUsIG9uZSBmb3IgZWFjaCBjb3JwdXMsIGFuZCBleHBvcnRlZCB0byBhIGNzdiBmaWxlLCBzdWNoIHRoYXQgd2UgY2FuIGFsc28gdXNlIHRoZSBkYXRhIGluIFB5dGhvbgpgYGB7cn0KIyMjIEMyMDIyCiMgaW5pdGlhdGUgZW1wdHkgZGF0YSBmcmFtZSBmb3IgQzIwMjIKY29sbG9jYXRpb25zX2NvbiA9IGRhdGEuZnJhbWUoZG9jbmFtZT1jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICBmcm9tPWludGVnZXIoKSwKICAgICAgICAgICAgICAgICB0bz1pbnRlZ2VyKCksCiAgICAgICAgICAgICAgICAgcHJlPWxvZ2ljYWwoKSwKICAgICAgICAgICAgICAgICBrZXl3b3JkPWNoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgIHBvc3Q9Y2hhcmFjdGVyKCksCiAgICAgICAgICAgICAgICAgcGF0dGVybj1mYWN0b3IoKSkKCiMgZm9yIGVhY2ggY29tcG91bmQgd29yZCAKZm9yICh3b3JkIGluIGNvbXBvdW5kcyl7CiAgIyBpbml0aWF0ZSBlbXB0eSBkYXRhIGZyYW1lCiAgY29sbHMgPSBkYXRhLmZyYW1lKCkKICAjIGxvb2sgdXAgY29sbG9jYXRpb25zCiAgY29sbHMgPC0ga3dpYyhzcF9jMjAyMl90b2tlbnMsIHBhdHRlcm49d29yZCwgd2luZG93PTEsIHZhbHVldHlwZT0iZml4ZWQiKSAlPiUKICBhc190aWJibGUoKQogICMgc2F2ZSB0byBkYXRhIGZyYW1lIAogIGNvbGxvY2F0aW9uc19jb24gPC0gcmJpbmQoY29sbG9jYXRpb25zX2NvbiwgY29sbHMpfQoKIyMjIFAyMDIyCiMgaW5pdGlhdGUgZW1wdHkgZGF0YSBmcmFtZSBmb3IgUDIwMjIKY29sbG9jYXRpb25zX3BybyA9IGRhdGEuZnJhbWUoZG9jbmFtZT1jaGFyYWN0ZXIoKSwKICAgICAgICAgICAgICAgICBmcm9tPWludGVnZXIoKSwKICAgICAgICAgICAgICAgICB0bz1pbnRlZ2VyKCksCiAgICAgICAgICAgICAgICAgcHJlPWxvZ2ljYWwoKSwKICAgICAgICAgICAgICAgICBrZXl3b3JkPWNoYXJhY3RlcigpLAogICAgICAgICAgICAgICAgIHBvc3Q9Y2hhcmFjdGVyKCksCiAgICAgICAgICAgICAgICAgcGF0dGVybj1mYWN0b3IoKSkKCiMgZm9yIGVhY2ggY29tcG91bmQKZm9yICh3b3JkIGluIGNvbXBvdW5kcyl7CiAgIyBpbml0aWF0ZSBlbXB0eSBkYXRhIGZyYW1lCiAgY29sbHMgPSBkYXRhLmZyYW1lKCkKICAjIGxvb2sgdXAgY29sbG9jYXRpb25zCiAgY29sbHMgPC0ga3dpYyhzcF9wMjAyMl90b2tlbnMsIHBhdHRlcm49d29yZCwgd2luZG93PTEsIHZhbHVldHlwZT0iZml4ZWQiKSAlPiUKICBhc190aWJibGUoKQogICMgc2F2ZSB0byBkYXRhIGZyYW1lIAogIGNvbGxvY2F0aW9uc19wcm8gPC0gcmJpbmQoY29sbG9jYXRpb25zX3BybywgY29sbHMpfQpgYGAKClBsZWFzZSBydW4gdGhlIGZvbGxvd2luZyBsaW5lcyB0byBzYXZlIHRoZSBvdXRwdXQgdG8gYSBjc3YgZmlsZS4gCmBgYHtyfQojd3JpdGUuY3N2KGNvbGxvY2F0aW9uc19jb24sICIuLi9vdXRwdXQvY29sbG9jYXRpb25zX2Nvbi5jc3YiKQojd3JpdGUuY3N2KGNvbGxvY2F0aW9uc19wcm8sICIuLi9vdXRwdXQvY29sbG9jYXRpb25zX3Byby5jc3YiKQpgYGAKCgoKIyMjIFRPIFJFUExBQ0UgQ09NUE9VTkQgRk9STVMgQlkgVEhFSVIgTEVNTUEKYGBge3J9CiMgZm9yIGVhY2ggY29tcG91bmQKCnNwX2MyMDIyX3Rva2VucyA8LSB0b2tlbnNfcmVwbGFjZShzcF9jMjAyMl90b2tlbnMsIHBhdHRlcm49ImtsaW1hZ2xhdWJlbnNsZWhyIiwgcmVwbGFjZW1lbnQ9ImtsaW1hZ2xhdWJlbnNsZWhyZSIsIHZhbHVldHlwZSA9ICJmaXhlZCIpCnNwX2MyMDIyX3Rva2VucyA8LSB0b2tlbnNfcmVwbGFjZShzcF9jMjAyMl90b2tlbnMsIHBhdHRlcm49ImtsaW1ha2FyYXdhbiIsIHJlcGxhY2VtZW50PSJrbGltYWthcmF3YW5lIiwgdmFsdWV0eXBlID0gImZpeGVkIikKc3BfYzIwMjJfdG9rZW5zIDwtIHRva2Vuc19yZXBsYWNlKHNwX2MyMDIyX3Rva2VucywgcGF0dGVybj0ia2xpbWF6ZXVnIiwgcmVwbGFjZW1lbnQ9ImtsaW1hemV1Z3MiLCB2YWx1ZXR5cGUgPSAiZml4ZWQiKQpzcF9jMjAyMl90b2tlbnMgPC0gdG9rZW5zX3JlcGxhY2Uoc3BfYzIwMjJfdG9rZW5zLCBwYXR0ZXJuPSJrbGltYXdlbmRlaGFsIiwgcmVwbGFjZW1lbnQ9ImtsaW1hd2VuZGVoYWxzIiwgdmFsdWV0eXBlID0gImZpeGVkIikKCgpzcF9wMjAyMl90b2tlbnMgPC0gdG9rZW5zX3JlcGxhY2Uoc3BfcDIwMjJfdG9rZW5zLCBwYXR0ZXJuPSJrbGltYWdsYXViZW5zbGVociIsIHJlcGxhY2VtZW50PSJrbGltYWdsYXViZW5zbGVocmUiLCB2YWx1ZXR5cGUgPSAiZml4ZWQiKQpzcF9wMjAyMl90b2tlbnMgPC0gdG9rZW5zX3JlcGxhY2Uoc3BfcDIwMjJfdG9rZW5zLCBwYXR0ZXJuPSJrbGltYWthcmF3YW4iLCByZXBsYWNlbWVudD0ia2xpbWFrYXJhd2FuZSIsIHZhbHVldHlwZSA9ICJmaXhlZCIpCnNwX3AyMDIyX3Rva2VucyA8LSB0b2tlbnNfcmVwbGFjZShzcF9wMjAyMl90b2tlbnMsIHBhdHRlcm49ImtsaW1hemV1ZyIsIHJlcGxhY2VtZW50PSJrbGltYXpldWdzIiwgdmFsdWV0eXBlID0gImZpeGVkIikKc3BfcDIwMjJfdG9rZW5zIDwtIHRva2Vuc19yZXBsYWNlKHNwX3AyMDIyX3Rva2VucywgcGF0dGVybj0ia2xpbWF3ZW5kZWhhbCIsIHJlcGxhY2VtZW50PSJrbGltYXdlbmRlaGFscyIsIHZhbHVldHlwZSA9ICJmaXhlZCIpCgoKCgpmb3IgKHdvcmRfZm9ybSBpbiBbImdsYXViZW5zbGVocmUiXSl7CiAgd29yZCA9IGModW5saXN0X2Zvcm1zKHdvcmRfZm9ybSkpICMgdHVybiBpbnRvIGNvcnJlY3QgZm9ybWF0CiAgb3JpZ2luYWwgPC0gY29tcG91bmRfZGZbY29tcG91bmRfZGYkY29tcG91bmRfZm9ybXMgJWxpa2UlIHdvcmRbWzFdXSwgXSRvcmlnaW5hbFtbMV1dCiAgbGVtbWEgPC0gcmVwKG9yaWdpbmFsLCBsZW5ndGgod29yZCkpCgogICMgcmVwbGFjZSBzdHJpbmcgaW4gdG9rZW5zIHdpdGggbGVtbWEgZm9ybSAoZm9yIHBybzIwMDAgYW5kIGNvbnRyYTIwMDApCiAgcHJvMjAwMF90b2tlbnMgPC0gdG9rZW5zX3JlcGxhY2UocHJvMjAwMF90b2tlbnMsIHdvcmQsIGxlbW1hLCB2YWx1ZXR5cGUgPSAiZml4ZWQiKQogIGNvbnRyYTIwMDBfdG9rZW5zIDwtIHRva2Vuc19yZXBsYWNlKGNvbnRyYTIwMDBfdG9rZW5zLCB3b3JkLCBsZW1tYSwgdmFsdWV0eXBlID0gImZpeGVkIil9CmBgYAoKCmBgYHtyfQojIGZ1bmN0aW9uIHRvIHByZXByb2Nlc3MgY29tcG91bmRzIGRhdGEgZnJhbWUgCiMgdGhpcyBmdW5jdGlvbiB1bm5lc3RzIHRoZSBsaXN0IG9mIHdvcmQgZm9ybXMgZm9yIGVhY2ggY29tcG91bmQgYW5kIGNyZWF0ZXMgYSBsaXN0IGNvbnRhaW5pbmcgYWxsIHBvdGVudGlhbCB3b3JkIGZvcm1zIAp1bmxpc3RfZm9ybXMgPSBmdW5jdGlvbih3b3JkKXsKICB4IDwtIHVubGlzdChzdHJzcGxpdCh3b3JkLCAiLCIpKQogIHJldHVybihnc3ViKCIgIiwiIix4KSl9CgojIGFwcGx5IHRoZSBmdW5jdGlvbiB0byBvdXIgY29tcG91bmRzIGRhdGEgZnJhbWUKY29tcG91bmRfZm9ybXMgPC0gdW5saXN0X2Zvcm1zKGNvbXBvdW5kX2RmJGNvbXBvdW5kX2Zvcm1zKQoKCgojZm9yICh3b3JkIGluIGNvbXBvdW5kX2Zvcm1zKXsKICMgcHJpbnQod29yZCkKI30KCmNvbXBvdW5kX2Zvcm1zCmBgYAoK